STM Exploration

Topic Summary

First, let’s load our STM results from Part 1.

load("~/Dropbox (UNC Charlotte)/NCStateSenateFacebook/data/stmFit.RData")
load("~/Dropbox (UNC Charlotte)/NCStateSenateFacebook/data/out.RData")

Let’s explore the size of the topics by their topic proportions.

library(stm)
plot.STM(stmFit, type = "summary", xlim = c(0,.12), n = 5, #labeltype = "frex",
         main = "NC State Senators' Topics on Facebook", text.cex = 0.8)

I’ve assigned labels to the topics based on my interpretation of the word-topic probabilities (see next section).

topicNames <- labelTopics(stmFit)
k <- 40
topic <- data.frame(
  topicnames = c("Press Conference",
                 "Local Government",
                 "Religious Freedom",
                 "#WeAreNotThis",
                 "Positive Outlook",
                 "Pat McCrory",
                 "Legislation",
                 "Gerrymandering",
                 "Supreme Court",
                 "Voter ID/Fraud",

                 "Presidential Election",
                 "Campaign Support",
                 "Health Care",
                 "Congressional Elections",
                 "North Carolina",
                 "HB2",
                 "Constituent Newsletter",
                 "Sen Van Duyn Posts",
                 "God, Family, Freedom",
                 "Teacher Pay",

                 "#NCPOL and #NCGA",
                 "Hurricane Matthew",
                 "Redistricting",
                 "Taxes",
                 "Congratulations",
                 "Bathroom Safety",
                 "Economy/Jobs",
                 "Student/Women's Issues",
                 "Hillary Clinton",
                 "Church",

                 "Civil Rights",
                 "Energy Tax Credits",
                 "Roy Cooper",
                 "Get Out the Vote",
                 "Gun Violence",
                 "Berger Press Releases",
                 "Public Assistance",
                 "Town Hall Events",
                 "Holiday Wishes",
                 "Conservative Values"),
  TopicNumber = 1:k,
  TopicProportions = colMeans(stmFit$theta))

Interpretating the Topics

par(mfrow = c(4,2),mar = c(1, 1, 2, 1))
for (i in 1:k){
  plot.STM(stmFit, type = "labels", n = 15, 
           topics = i, main = paste0(topic$topicnames[i]," - Raw Probabilities"),
           width = 55)
  plot.STM(stmFit, type = "labels", n = 15, 
           topics = i, main = paste0(topic$topicnames[i]," - FREX"), 
           labeltype = "frex", width = 55)
}

#shortdoc <- substr(subset(fbData$ID,out$meta$ID),1,200))
#thoughts3 <- findThoughts(stmFit, texts = shortdoc, n = 2, topics = 2)
par(mfrow = c(1,1),mar = c(2, 2, 2, 2))
topicQuality(stmFit,
             documents = out$documents, 
             main = "Topic Interpretability: Exclusivity and Semantic Coherence")
##  [1] -117.69762  -99.02861 -108.36939 -113.10056 -119.04781  -83.70658
##  [7] -114.95785 -106.45754  -94.32849  -83.56333  -60.37099 -107.18429
## [13]  -85.71741  -96.02803  -97.72806  -96.74052 -133.55053 -109.95572
## [19] -131.66939  -68.16150 -170.58757 -131.58723  -89.55902  -85.80290
## [25] -137.62628  -63.21635  -80.50761 -141.80216 -144.41775 -155.46163
## [31] -106.00173 -145.81977  -82.39580  -84.88564 -156.92077  -49.98792
## [37] -141.26560 -126.25005 -165.69881 -129.67554
##  [1] 9.835140 9.854379 9.658130 9.788529 9.958288 9.982604 9.874494
##  [8] 9.775709 9.805462 9.818819 9.903764 9.816718 9.858738 9.807711
## [15] 9.940360 9.678566 9.849708 9.946512 9.778923 9.777436 9.881158
## [22] 9.791882 9.689455 9.664901 9.883178 9.892371 9.693516 9.778924
## [29] 9.574346 9.878137 9.909861 9.706640 9.881069 9.860808 9.713992
## [36] 9.889008 9.734380 9.794439 9.742642 9.959924

prep <- estimateEffect(1:k ~ Party + s(Time), stmFit, meta = out$meta, uncertainty = "Global")

Result <- plot.estimateEffect(prep, "Party", method = "difference", 
                              cov.value1 = "Democratic", cov.value2 = "Republican", 
                              verbose.labels = F, 
                              ylab = "Expected Difference in Topic Probability by Party \n (with 95% Confidence Intervals)", 
                              xlab = "More Likely Republican                         Not Significant                          More Likely Democratic",
                              main = "Effect of Party on Topic Prevelance for Facebook Posts of NC State Senators",
                              xlim = c(-0.08,0.08))

# order based on Expected Topic Proportion
rank = order(unlist(Result$means))
topic <- topic[rank,]

par(mfrow = c(1,1),mar = c(6, 6, 4, 4))
STMresults <- plot.estimateEffect(prep, "Party", method = "difference", cov.value1 = "Democratic", 
                                  cov.value2 = "Republican", 
                                  topics = topic$TopicNumber,
                                  verbose.labels = F, 
                                  ylab = "Expected Difference in Topic Probability by Party \n (with 95% Confidence Intervals)", 
                                  labeltype = "custom",
                                  xlab = "More Likely Republican                         Not Significant                          More Likely Democratic",
                                  custom.labels  = topic$topicnames, 
                                  main = "Effect of Party on Topic Prevelance for Facebook Posts of NC State Senators",
                                  xlim = c(-.08,0.08))

# time
par(mfrow = c(2,2),mar = c(4,4,2,2))
for (i in 1:k){
  plot.estimateEffect(prep, "Time", method = "continuous", topics = rank[i], model = z,  
                      main = paste0(topic$topicnames[i],": Topic ",i),
                      printlegend = FALSE, ylab = "Exp. Topic Prob", 
                      xlab = "Time (Month, 1 = Jan 2015 to 24 = Dec 2016)", ylim = c(-0.01,0.2)
                      )
}

Comparing Presidential Election

# Pres Election and Hillary Clinton
plot.STM(stmFit, type = "perspectives", topics = c(11,29), n=30, plabels = c("Presidential Election","Hillary Clinton"))

Topic Correlations

Let’s create an interactive network for the topics (nodes) in which an edge represents a significant correlation between the topic. The size of the node is the prevalence (expected topic proportion) for the topic.

library(igraph); library(visNetwork)
par(mfrow = c(1,1))

mod.out.corr <- topicCorr(stmFit, cutoff = .025)

#library(corrplot)
#corrplot(mod.out.corr$cor, order="hclust", hclust.method="ward.D2", method = "circle", type = "lower", diag = F)

#mod.out.corr <- topicCorr(stmFit, method = "huge")

links2 <- as.matrix(mod.out.corr$posadj)
net2 <- graph_from_adjacency_matrix(links2, mode = "undirected")
table(V(net2)$type)
## < table of extent 0 >
net2 <- simplify(net2, remove.multiple = F, remove.loops = T) 

links <- as_data_frame(net2, what="edges")
nodes <- as_data_frame(net2, what="vertices")

# Community Detection
clp <- cluster_label_prop(net2)
nodes$community <- clp$membership

means <- as.data.frame(unlist(STMresults$means))
colnames(means) <- "means"
color <- colorRamp(c("white","blue"))(abs(means$means)/0.05)
means$colorDem <- rgb(color[,1],color[,2],color[,3],  maxColorValue=255)

color <- colorRamp(c("white","red"))(abs(means$means)/0.05)
means$colorRep <- rgb(color[,1],color[,2],color[,3],  maxColorValue=255)

means$color <- ifelse(means$means>0,means$colorDem,means$colorRep)

#visNetwork edits
nodes$shape <- "dot"  
nodes$shadow <- TRUE # Nodes will drop shadow
nodes$title <- apply(topicNames$prob, 1, function(x) paste0(x, collapse = " + "))[rank] # Text on click
nodes$label <- topic$topicnames # Node label
nodes$size <- (topic$TopicProportions / max(topic$TopicProportions)) * 40 # Node size
nodes$borderWidth <- 2 # Node border width

nodes$color.background <- means$color
nodes$color.border <- "black"
nodes$color.highlight.background <- "orange"
nodes$color.highlight.border <- "darkred"
nodes$id <- topic$TopicNumber

visNetwork(nodes, links, width="100%",  height="600px", main="NC State Senator Topic (Correlation) Network") %>% visOptions(highlightNearest = list(enabled = TRUE, algorithm = "hierarchical")) %>% 
  visGroups(groupname = "Republican", color = "red") %>%
  visGroups(groupname = "Democrat", color = "blue") %>%
  visLegend(main = list(text = "Custom Legend",
 style = "font-family:Comic Sans MS;color:#ff0000;font-size:12px;text-align:center;"))